### This project replicates the analyses reported in Toshkov and Krouwel (2022) 'Beyond the U-Curve: Citizen Preferences on European Integration in Multidimensional Political Space' in European Union Politics 
### Part II: This script prepares and analyzes the data from the 2019 EP elections in Germany
# Clean up VAA data and make variables -------------------------------------------------------------------------
## This part of the script shows the preparation of the datafile that is shared from the original datafile that is proprietary
# Load the data
de.all <- read_sav('./data in/de ep 2019/DE EU19 AN.sav')

# Recode variables and filter
de.all <- de.all %>%
  mutate(lr = popup_answer_1047 - 5,
         proeu = popup_answer_1051 - 5,
         progr = popup_answer_1049 - 5,
         ep2019.vote.intent = recode (popup_answer_1044, '0' = 'CDU-CSU', '1' = 'SPD', '2' = 'Die Gruenen', '3' = 'AfD', '4' = 'Die Linke', '5' = 'FDP', 
                                      '6' = 'FW', '7' = 'Die Partei', '8' = 'Piraten', '9' = 'Other', '10' = 'Other', '11' = 'Other', '12' = 'Volt', 
                                      '13' = 'Other', '14' = 'Other',  '15' = 'Other', '16' = 'Other',
                                      '17' = 'No vote', '18' = NA_character_),
         nat.elections.vote.intent = recode (popup_answer_1046, '0' = 'CDU-CSU', '1' = 'SPD', '5' = 'Die Gruenen', '2' = 'AfD', '4' = 'Die Linke', '3' = 'FDP', 
                                      '6' = 'FW', '9' = 'Other',
                                      '7' = 'No vote', '8' = NA_character_),
         nat.elections.2017.vote = recode (popup_answer_1048, '0' = 'CDU-CSU', '1' = 'SPD', '2' = 'Die Gruenen', '3' = 'AfD', '4' = 'Die Linke', '5' = 'FDP', 
                                           '6' = 'FW', '7' = 'Other',
                                           '8' = 'No vote', '9' = NA_character_),
         age = as.numeric(as.character(ifelse (bg_question_429==84, NA_character_, 2019 - (2003 - bg_question_429)))),
         gender = recode (bg_question_428, '0' = 'man', '1' = 'woman', '2' = NA_character_, '3' = NA_character_),
         edu = recode (bg_question_430, '5' = 'high', '4' = 'mid', '3' = 'mid', '2' = 'mid', '1' = 'low',
                       '0' = 'low', '6' = NA_character_),
         themes = case_when ((relevant_themes_252 == 1 ~ 'asylum_integration'),  (relevant_themes_257 == 1 ~ 'environment'),
                             (relevant_themes_284 == 1 ~ 'society'), (relevant_themes_289 == 1 ~ 'european_integration'),
                             (relevant_themes_295 == 1 ~ 'economy')),
         type = 'voter',
         eu.bad = statement_answer_5000 - 3,
         eu.voteage.16 = statement_answer_5028 - 3,
         state.econ.out = statement_answer_5010 - 3,
         islam.bad = statement_answer_5008 -3,
         redistribute = statement_answer_5011 -3,
         save.illegal.migrants = statement_answer_5022 -3,
         eu.study.fees = statement_answer_5027 -3,
         weed.legal = statement_answer_5005 -3,
         privacy.limit = statement_answer_5006 - 3,
         ban.glyphosate = statement_answer_5017 - 3,
         eu.enlarge = statement_answer_5020 - 3,
         greener.energy = statement_answer_5013 -3,
         nat.borders = statement_answer_5026 -3,
         eu.promote.organic = statement_answer_5029 -3,
         eu.tax = statement_answer_5015 -3,
         eu.enforce = statement_answer_5016 -3,
         eu.veto = statement_answer_5003 -3,
         eu.min.wage = statement_answer_5024 -3,
         euro.out = statement_answer_5001 - 3,
         abortion.free = statement_answer_5009 - 3,
         eu.borrow.more = statement_answer_5021 - 3,
         eu.citizens = statement_answer_4998 - 3,
         eu.fp = statement_answer_5002 -3,
         eu.common.mintax =  statement_answer_5025 -3,
         eu.army = statement_answer_5019 -3,
         eu.fight.illegal.immigration  = statement_answer_5018 -3,
         eu.fin.solidarity = statement_answer_4999 -3,
         immigrants.adapt = statement_answer_5007 -3,
         sanction.russia = statement_answer_5023 -3,
         eu.no.pollution.caps = statement_answer_5030 -3,
         co2.tax = as.numeric(as.character(ifelse (popup_answer_1054 > 4, NA_character_, (popup_answer_1054 - 2)*-1))),
         copyright.check = as.numeric(as.character(ifelse (popup_answer_1053 > 4, NA_character_, (popup_answer_1053 - 2)))),
         homo.marry = as.numeric(as.character(ifelse (popup_answer_1052 > 4, NA_character_, (popup_answer_1054 - 2)*-1)))
         ) %>%
  filter (country == 'Germany', amount_visits == 1,
          seconds_on_statements > 30*3, seconds_on_statements < 30*30) %>%
  select (lr:ncol(.))

de.all$lr <- ifelse (de.all$lr==6, NA, de.all$lr)
de.all$progr <- ifelse (de.all$progr==6, NA, de.all$progr)
de.all$proeu <- ifelse (de.all$proeu==6, NA, de.all$proeu)

# remove those who answered everything with Neutral
de.all$zeroes <- de.all %>% 
  select (eu.bad:homo.marry)  %>%
  abs () %>%
  rowSums(na.rm=T)

de.all <- de.all %>% filter (zeroes!=0) %>% select (-zeroes)


# create deductively derived positions on scales
de.all <- de.all %>%
  mutate (obj.lr = (state.econ.out - eu.fin.solidarity  - eu.tax - eu.min.wage - eu.study.fees - redistribute)/6,
          obj.cp = (- islam.bad + save.illegal.migrants + weed.legal - privacy.limit + ban.glyphosate + greener.energy +
                      abortion.free - immigrants.adapt)/8,
          obj.eu = (- eu.bad + eu.enlarge - nat.borders + eu.tax + eu.enforce - eu.veto + eu.min.wage - euro.out +
                      eu.borrow.more - eu.citizens + eu.fp + eu.common.mintax + eu.army + eu.fight.illegal.immigration +
                      eu.fin.solidarity)/15,
          obj.eu2 = (- eu.bad + eu.enlarge - nat.borders + eu.tax + eu.enforce - eu.veto + eu.min.wage - euro.out + 
                       eu.borrow.more - eu.citizens + eu.fp + eu.common.mintax + eu.army + eu.fight.illegal.immigration + 
                       eu.fin.solidarity + eu.promote.organic - eu.no.pollution.caps + eu.study.fees + eu.voteage.16)/19
  )

de.all$id = seq.int(nrow(de.all)) #index for merging later

# save
save (de.all, file = './data out/de2019_replication.RData')
# Load and subset the data (start replication here) -------------------------------------------------------------------------
load (file = './data out/de2019_replication.RData')

# PCA and Factor analysis ----------------------------------------------------------------
de.s <- de.all %>% select(id, eu.bad:eu.no.pollution.caps) %>%
  filter(complete.cases(.))

# get polychoric covariance
tk.poly<-polychoric(select(de.s, -id))

# determine number of factors
ev <- eigen(cor(select(de.s, -id)))
ev.poly <- eigen(tk.poly$rho) #with the polychronic correlations
# number of factors

ap <- parallel(subject=nrow(de.s),var=ncol(select(de.s, -id)),rep=10,quantile=.95)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS)

# principal components (Table A10)
pca.1 <- principal(select(de.s, -id), nfactors=5, rotate='none', cor='poly', scores=TRUE)
print(pca.1, digits=2, cut=.39, sort=T)

pca.out<-as.data.frame(unclass(pca.1$loadings))
pca.out <- pca.out %>%
  mutate_at(vars(starts_with('PC')), ~replace(., abs(.) < 0.4, 0)) %>%
  mutate_at(vars(starts_with('PC')), funs(round(., 2))) %>%
  arrange (-abs(PC1),-abs(PC2), -abs(PC3), -abs(PC4), -abs(PC5)) %>%
  mutate_at(vars(starts_with('PC')), ~replace(., abs(.) == 0, '')) %>%
  mutate ( issue = rownames(.)) %>%
  relocate (issue) 
pca.out <- rbind (pca.out, c('Proportion Variance', paste0(round(prop.table(pca.1$values)[1:5]*100,0), '%')))
pca.out

pca.de.table = htmlTable::htmlTable(pca.out, rnames = FALSE,
                                    col.columns = c('white','lightgrey'),
                                    tfoot = 'Note: Loadings smaller than +/-0.41 are not printed',
                                    align = c('l','r','r','r','r','r'),
                                    align.header = c('l','r','r','r','r','r'),
                                    header = c('Policy issue','PC1','PC2','PC3','PC4','PC5'),
                                    caption = "Table X. PCA results from Germany, 2019")
print(pca.de.table, type = "html", file = './tables/pca_de_table.html')

# factor analysis with polychoric correlations and oblique rotation (Table 2)
fa.1 <- fa (select(de.s, -id), nfactors=5, rotate='Promax', cor='poly', scores=TRUE)
print(fa.1, digits=2, cut=.33, sort=T)

fa.out<-as.data.frame(unclass(fa.1$loadings))
fa.out <- fa.out %>%
  mutate_at(vars(starts_with('MR')), ~replace(., abs(.) < 0.31, 0)) %>%
  mutate_at(vars(starts_with('MR')), funs(round(., 2))) %>%
  arrange (-abs(MR2),-abs(MR1), -abs(MR3), -abs(MR5), -abs(MR4)) %>%
  mutate_at(vars(starts_with('MR')), ~replace(., abs(.) == 0, '')) %>%
  mutate ( issue = rownames(.)) %>%
  relocate (issue) 
fa.out <- rbind (fa.out, c('Proportion Variance', paste0(round(fa.1$Vaccounted[2,]*100,0), '%')))

fa.out

fa.de.table = htmlTable::htmlTable(fa.out, rnames = FALSE,
                                   col.columns = c('white','lightgrey'),
                                   tfoot = 'Note: Loadings smaller than +/-0.31 are not printed',
                                   align = c('l','r','r','r','r','r','r'),
                                   align.header = c('l','r','r','r','r','r','r'),
                                   header = colnames(fa.out),
                                   caption = "Table X. Factor Analysis results from Germany, 2019")
print(fa.de.table, type = "html", file = './tables/fa_de_table.html')


# Run the same factor model but only on the subset with party affiliation (Table A11)
de.s2 <- de.all %>%
  select (id, lr, progr, proeu, obj.lr, obj.cp, obj.eu, obj.eu2, eu.bad:eu.no.pollution.caps) %>%
  na.omit 

# factors with fa.poly and promax rotation
fa.2 <- fa (select(de.s2, eu.bad:eu.no.pollution.caps), nfactors=5, rotate='Promax', cor='poly', scores=TRUE)
print(fa.2, digits=2, cut=.33, sort=T)


fa2.out<-as.data.frame(unclass(fa.2$loadings))
fa2.out <- fa2.out %>%
  mutate_at(vars(starts_with('MR')), ~replace(., abs(.) < 0.31, 0)) %>%
  mutate_at(vars(starts_with('MR')), funs(round(., 2))) %>%
  arrange (-abs(MR1),-abs(MR2), -abs(MR3), -abs(MR4), -abs(MR5)) %>%
  mutate_at(vars(starts_with('MR')), ~replace(., abs(.) == 0, '')) %>%
  mutate ( issue = rownames(.)) %>%
  relocate (issue) 

fa2.out <- rbind (fa2.out, c('Proportion Variance', paste0(round(fa.2$Vaccounted[2,]*100,0), '%')))

fa2.de.table = htmlTable::htmlTable(fa2.out, rnames = FALSE,
                                    col.columns = c('white','lightgrey'),
                                    tfoot = 'Note: Loadings smaller than +/-0.31 are not printed',
                                    align = c('l','r','r','r','r','r','r'),
                                    align.header = c('l','r','r','r','r','r','r'),
                                    header = colnames(fa2.out),
                                    caption = "Table SS5. Factor Analysis results from Germany, 2019")
print(fa2.de.table, type = "html", file = './tables/fa2_de_table.html')

# assign scores to cases
de.s <- bind_cols(de.s, data.frame(pca.1$scores), data.frame(fa.1$scores))

de.all <- left_join (de.all, select(de.s, id, PC1:MR4), by='id')

# save the file with the factor scores

# Subset -------------------------------------------------
de.s <- de.all %>% 
  filter (type=='voter') %>% 
  select ('lr','progr','proeu') %>% 
  filter(complete.cases(.))


# Self-placement analysis: 3D plots -------------------------------------------------
#prepare data
elevation.df = data.frame(x = de.s$lr, y = de.s$progr, z = de.s$proeu)
elevation.loess = loess(z ~ x*y, data = elevation.df, degree=2, span=0.4)
xmin<- -5
xmax<- 5
ymin<- -5
ymax<- 5

elevation.fit = expand.grid(list(x = seq(xmin, xmax, 0.1), y = seq(ymin, ymax, 0.1)))
z = predict(elevation.loess, newdata = elevation.fit)
elevation.fit$Height = as.numeric(z)

#prepare matrix with x and y and rows/cols and z as the cell values
u1<-unique(elevation.fit$x)
u2<-unique(elevation.fit$y)
temp.data<-matrix(NA, nrow=length(u1), ncol=length(u2))

for (i in 1:length(u1)){
  for (j in 1:length(u2)){
    temp.data[i,j]<-elevation.fit$Height[elevation.fit$x==u1[i] & elevation.fit$y==u2[j]]
  }
}

ix <- seq(1, nrow(temp.data), length.out = 11)
iy <- seq(1, ncol(temp.data), length.out = 11)

# 3D plot in black and white (Figure 4, left)
png('./figures/de 2019 eu3.png', width=900*2, height=700*2, res=96)
par(mfrow = c(1, 1), mar = c(0, 0, 0, 0))
persp3D(z = temp.data, theta = 45, phi = 10, facets = T, curtain = TRUE, 
        shade=0.8, col = ramp.col(c("white", "black")), border = "grey", colkey = FALSE, xlab="Left-Right", ylab="Conservative-Progressive", zlab="Anti-Pro EU")
dev.off()

# 4 3D plots together (Figure A8 top)
png('./figures/de 2019 eu6.png', width=900*2, height=700*2, res=96)
par(mfrow = c(2, 2), mar = c(3, 3, 3, 3))

persp3D(z = temp.data, theta = 55, phi = 20, facets = T, curtain = TRUE, 
        shade=0.8, colkey = FALSE, xlab="Left-Right", ylab="Conservative-Progressive", zlab="Anti-Pro EU", box=T, bty='b2',ticktype = "detailed")

persp3D(z = temp.data, theta = 45, phi = 10, facets = T, curtain = TRUE, 
        shade=0.8, col = ramp.col(c("white", "black")), border = "grey", colkey = FALSE, xlab="Left-Right", ylab="Conservative-Progressive", zlab="Anti-Pro EU")

ribbon3D(z = temp.data[ix, ], along = "y", cex.axis = 0.8, 
         curtain = T, space = 0.7, theta = 25, phi = 5, shade=0.2, colkey = FALSE, xlab="Left-Right", ylab="Conservative-Progressive", zlab="Anti-Pro EU", 
         box=T, bty='b2', ticktype = "detailed")

hist3D(z = temp.data[ix,iy], theta = 55, phi = 20, shade=0.2, colkey = FALSE, box=T, bty='b2',ticktype = "detailed",
       xlab="Left-Right", ylab="Conservative-Progressive", zlab="Anti-Pro EU")

dev.off()

# now with the rayshader package (Figure A8 bottom)
temp.data2 = temp.data*300

raymat = ray_shade(temp.data2)
ambmat = ambient_shade(temp.data2)

png('./figures/raysahder_de.png', width=1000, height=800, res=96)

temp.data2 %>%
  sphere_shade(texture = "desert") %>%
  add_water(detect_water(temp.data2), color="desert") %>%
  add_shadow(ray_shade(temp.data2,zscale=3,maxsearch = 300),0.5) %>%
  add_shadow(ambmat,0.5) %>%
  plot_3d(temp.data2,zscale=35,fov=0,theta=-197,zoom=0.75,phi=10, windowsize = c(1000,800),
          water=TRUE, waterdepth = 0, wateralpha = 0.5,watercolor = "lightblue",
          waterlinecolor = "white",waterlinealpha = 0.5)


render_label(temp.data2, x=1,y=1, z=1100, zscale=35,relativez=FALSE, text = "Left, Conservative",textsize = 1.7,linewidth = 2,freetype = FALSE,
             linecolor = 'darkred', textcolor = "darkred") 
render_label(temp.data2, x=101,y=1, z=1200, zscale=35,relativez=FALSE,
             linecolor = 'darkblue', textcolor = "darkblue", text = "Right, Conservative",textsize = 1.7,linewidth = 2,freetype = FALSE)
render_label(temp.data2, x=1,y=101, z=1400, zscale=35,relativez=FALSE,
             linecolor = 'darkred', textcolor = "darkred", text = "Left, Progressive",textsize = 1.7,linewidth = 2,freetype = FALSE)
render_label(temp.data2, x=101,y=101, z=1200, zscale=35, relativez=FALSE,
             linecolor = 'darkblue',textcolor = "darkblue", text = "Right, Progresive",textsize = 1.7,linewidth = 2,freetype = FALSE)

#render_label(temp.data2, x=26,y=101, z=1400, zscale=35, relativez=FALSE, dashed= TRUE,
#             linecolor = 'black', textcolor = "black", text = "Peak of EU support",textsize = 1.5,linewidth = 2,freetype = FALSE, offset=0)
render_label(temp.data2, x=85,y=15, z=300, zscale=35, relativez=FALSE, dashed= TRUE,
             linecolor = 'black', textcolor = "black", text = "Lake of Eurosceptics",textsize = 1.5, linewidth = 2,freetype = FALSE)

render_snapshot()
dev.off()


# Additional 2D and 3D density plots --------------------------------------------------
# 3D histogram with the density mapped as color
smooth <- MASS::kde2d(de.s$lr, de.s$progr, lims = c(-5, 5, -5, 5), n = 11)$z 

# 4 3D plots (Figure A9)
png('./figures/de 2019 dens eu.png', width=900*2, height=700*2, res=96)
par(mfrow = c(2, 2), mar = c(3, 3, 3, 3))
color = rev(rainbow(10, start = 0/6, end = 4/6))

hist3D(z = temp.data[ix,iy], theta = 35, phi = 20, shade=0.2, colvar = smooth, col=color, colkey = FALSE,
       box=T, bty='b2',ticktype = "detailed",xlab="Left-Right", ylab="Conservative-Progressive", zlab="Anti-Pro EU")

hist3D(z = temp.data[ix,iy], theta = 35 + 180, phi = 20, shade=0.2, colvar = smooth, col=color,
       box=T, bty='b2',ticktype = "detailed",xlab="Left-Right", ylab="Conservative-Progressive", zlab="Anti-Pro EU")

hist3D(colvar = temp.data[ix,iy], theta = 35, phi = 20, shade=0.2, z = smooth, col=color, colkey = FALSE,
       box=T, bty='b2',ticktype = "detailed",xlab="Left-Right", ylab="Conservative-Progressive", zlab="Density")

hist3D(colvar = temp.data[ix,iy], theta = 35 + 180, phi = 20, shade=0.2, z = smooth, col=color,
       box=T, bty='b2',ticktype = "detailed",xlab="Left-Right", ylab="Conservative-Progressive", zlab="Density")
dev.off()

# Subjective placement and objective positions: correlation plots (Figure 3) ----------------------------------
png('./figures/de 2019 corplot2.png', width=900, height=900, res=96)

ggpairs(
  de.all [de.all$type=='voter', c('obj.lr', 'obj.cp', 'obj.eu', 'lr','progr','proeu')],
  upper = list(continuous = "cor"),
  #upper = list(continuous = "density"),
  lower = list(continuous = "trends"),
  diag  = list(continuous = "barDiag"),
  axisLabels = 'show',
  columnLabels = c('Obj. Left-Right','Obj. Cons.-Progr.','Obj. Anti-Pro EU',
                   'Left-Right','Conservative-Progressive','Anti-Pro EU')) + 
  theme_bw()
dev.off()

# Objective positions: 3D plots ----------------------------------
de.s2 <- de.all %>% 
  filter (type=='voter') %>% 
  select ('obj.lr','obj.cp','obj.eu') %>% 
  filter(complete.cases(.))

#prepare data
elevation.df = data.frame(x = de.s2$obj.lr, y = de.s2$obj.cp, z = de.s2$obj.eu)
elevation.loess = loess(z ~ x*y, data = elevation.df)

xmin<- -2
xmax<- 2
ymin<- -2
ymax<- 2

elevation.fit = expand.grid(list(x = seq(xmin, xmax, 0.1), y = seq(ymin, ymax, 0.1)))
z = predict(elevation.loess, newdata = elevation.fit)
elevation.fit$Height = as.numeric(z)

#prepare matrix with x and y and rows/cols and z as the cell values
u1<-unique(elevation.fit$x)
u2<-unique(elevation.fit$y)
temp.data<-matrix(NA, nrow=length(u1), ncol=length(u2))

for (i in 1:length(u1)){
  for (j in 1:length(u2)){
    temp.data[i,j]<-elevation.fit$Height[elevation.fit$x==u1[i] & elevation.fit$y==u2[j]]
  }
}

ix <- seq(1, nrow(temp.data), length.out = 11)
iy <- seq(1, ncol(temp.data), length.out = 11)

smooth <- MASS::kde2d(de.s2$obj.lr, de.s2$obj.cp, lims = c(-2, 2, -2, 2), n = 11)$z 

# 3D in black and white (Figure 4, right panel)
png('./figures/de 2019 obj eu3.png', width=900*2, height=700*2, res=96)
par(mfrow = c(1, 1), mar = c(0, 0, 0, 0))
persp3D(z = temp.data, theta = 45, phi = 10, facets = T, curtain = TRUE, 
        shade=0.8, col = ramp.col(c("white", "black")), border = "grey", colkey = FALSE, xlab="Obj. Left-Right", ylab="Obj. Conservative-Progressive", zlab="Obj. Anti-Pro EU")
dev.off()


# Figure A9
png('./figures/de 2019 dens eu3.png', width=900, height=700, res=96)
par(mfrow = c(2, 2), mar = c(1, 1, 1, 1))
color = rev(rainbow(10, start = 0/6, end = 4/6))

hist3D(z = temp.data[ix,iy], theta = 35, phi = 20, shade=0.2, colvar = smooth, col=color, colkey = FALSE, clab='Density',
       box=T, bty='b2',ticktype = "detailed",xlab="Left-Right", ylab="Conservative-Progressive", zlab="Anti-Pro EU")

hist3D(z = temp.data[ix,iy], theta = 35 - 90, phi = 20, shade=0.2, colvar = smooth, col=color, clab='Density',
       box=T, bty='b2',ticktype = "detailed",xlab="Left-Right", ylab="Conservative-Progressive", zlab="Anti-Pro EU")

hist3D(colvar = temp.data[ix,iy], theta = 35, phi = 20, shade=0.2, z = smooth, col=color, colkey = FALSE, clab='Anti-Pro EU',
       box=T, bty='b2',ticktype = "detailed",xlab="Left-Right", ylab="Conservative-Progressive", zlab="Density")

hist3D(colvar = temp.data[ix,iy], theta = 35 - 90, phi = 20, shade=0.2, z = smooth, col=color, clab='Anti-Pro EU',
       box=T, bty='b2',ticktype = "detailed",xlab="Left-Right", ylab="Conservative-Progressive", zlab="Density")
dev.off()
### THE END
